Imports
library(evd);
library(evdbayes);
library(coda);
library(ismev);
library(xts);
Loading the Data
load("../data/CAPE_Minder_Rychener_Malsot.RData")
load("../data/NINO34.RData")
load("../data/SRH_Minder_Rychener_Malsot.RData")
Generate PROD
prod = sqrt(cape)*srh
** Create Time Series Objects **
dates <- seq.Date(as.Date("1979-1-1"), as.Date("2015-12-31"), by=1)
feb29ix <- format(as.Date(dates), "%m-%d") == "02-29"
dates <- dates[!feb29ix]
prod_ts = xts(prod, order.by = rep(dates, each=8))
Beginning the Analysis
month_names = c("jan","feb","mar","apr","may","jun","jul","aug","sep","oct","nov","dec")
get_monthly = function(x) {
output = list()
len = nrow(x)
# Get month of element
month = time(x)
month = gsub("....-", "", month)
month = gsub("-..", "", month)
monthlist = unique(month)
for (i in 1:12) {
output[[i]] = x[month == monthlist[i],]
}
names(output) = month_names
return(output)
}
monthly_max = get_monthly(apply.monthly(prod_ts, max))
r = 2
r_monthly_max = get_monthly(apply.monthly(prod_ts, function(x) order(x, decreasing=T)[1:r]))
# get the monthly maxima of prod
m1 = as.data.frame(apply.monthly(prod_ts, max))$V1;
# produce the gumbel qq plot
gumbel_qq = function(x, title="") qqplot(qgumbel(c(1:length(x))/(length(x)+1)),
x,
main = paste("Gumbell Q-Q Plot", title),
xlab = "Theoretical Quantiles",
ylab = "Sample Quantiles") ;
gumbel_qq(m1)

#qqplot(qgumbel(c(1:length(m1))/(length(m1)+1)),m1,main = "Gumbell Q-Q Plot",xlab = "Theoretical Quantiles", ylab = "Sample Quantiles") ;
The qq plot doesn’t fit very well, especially in the lower tail. This is likely due to seasonal dependence.
Fitting GEV to the entire data
# fit gevd with MLE and produce diagnostic plots
fitmax.MLE<-fgev(m1,method="Nelder-Mead")
par(mfrow=c(2,2))
fitmax.MLE
Call: fgev(x = m1, method = "Nelder-Mead")
Deviance: 9272.599
Estimates
loc scale shape
7.519e+03 7.013e+03 4.757e-03
Standard Errors
loc scale shape
421.90201 331.43749 0.05347
Optimization Information
Convergence: successful
Function Evaluations: 171
plot(fitmax.MLE)

Poor fit, probably because the distribution isn’t stationary. This is especially visible in the Probability plot, in which the confidence band is surpassed, indicating a poor fit.
# fit gevd with Bayesian Techniques
# use the previous outputs (rounded) as initial values (use a different shape)
init<-c(1.6e4,4e3,0.1)
# arbitrary priors
mat <- diag(c(10000,10000,100))
pn <- prior.norm(mean=c(0,0,0),cov=mat)
# proposal standard deviation (found by trial and error to get 20%<acceptance rate<40%)
psd<-c(500,0.03,0.02)
# draw 3k samples from markov chain
nit = 30000
thinning = 300
post<-posterior(nit, init=init,prior=pn,lh="gev",data=m1,psd=psd)
# diagnostic plots
MCMC<-mcmc(post[1:nit %% thinning == 0, ], thin=300)
plot(MCMC)

attr(mcmc(post),"ar")
mu sigma xi total
acc.rates 0.24 0.72 0.70 0.55
ext.rates 0.00 0.00 0.01 0.00
#MCMC_stab <- mcmc(post, thin=50, start=1000)
acf(mcmc(post[1:nit %% thinning == 0, ]))

We observe that there seem to be no substantial issues from the autocorrelation plots.
apply(mcmc(post[1:nit %% thinning == 0, ]),2,mean)
mu sigma xi
216.8681802 11212.0488049 -0.1898789
apply(mcmc(post[1:nit %% thinning == 0, ]),2,sd)
mu sigma xi
101.68289973 737.97773473 0.04013345
** Fit with MLE for months separately**
#monthly_fits = lapply(monthly_max,
# function(x) fgev(data.frame(x)[,1], method="Nelder-Mead"))
monthly_fits = list()
error_cases = c(9, 12)
for (i in 1:length(monthly_max)) {
print(i)
if (i %in% error_cases) {
monthly_fits[[i]] = fgev(as.data.frame(monthly_max[[i]])$V1,
method = "Nelder-Mead",
std.err = FALSE)
}
else {
monthly_fits[[i]] = fgev(as.data.frame(monthly_max[[i]])$V1,
method = "Nelder-Mead")
}
}
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
optimization may not have succeeded
[1] 10
[1] 11
[1] 12
names(monthly_fits) = names(monthly_max)
gumbel_qq(data.frame(monthly_max[[9]])[,1], "September")

gumbel_qq(data.frame(monthly_max[[12]])[,1], "December")

get_se = function(x, ix) {
if (is.null(x$std.err)) 0.01
else x$std.err[ix]
}
mle_loc = unlist(lapply(monthly_fits, function(x) x$estimate[1]))
mle_loc_se = unlist(lapply(monthly_fits, get_se, 1))
mle_scale = unlist(lapply(monthly_fits, function(x) x$estimate[2]))
mle_scale_se = unlist(lapply(monthly_fits, get_se, 2))
mle_shape = unlist(lapply(monthly_fits, function(x) x$estimate[3]))
mle_shape_se = unlist(lapply(monthly_fits, get_se, 3))
plot_w_err = function(x, y, se, title = NULL) {
max_ix = which.max(y)
min_ix = which.min(y)
plot(x, y,
ylim = c(y[min_ix] - se[min_ix], y[max_ix] + se[max_ix]),
main = title)
arrows(x,y-se,x,y+se, code=3, length=0.02, angle = 90)
}
plot_w_err(1:12, mle_loc, mle_loc_se, "Location Parameter as Estimated with Likelihood")

plot_w_err(1:12, mle_scale, mle_scale_se, "Scale Parameter as Estimated with Likelihood")

plot_w_err(1:12, mle_shape, mle_shape_se, "Shape Parameter as Estimated with Likelihood")

** Fit with Bayesian Methods for months separately**
bayes_fitter = function(x,
init = c(1e3, 1e3, 0.1), # Initial values
mat = diag(c(10000,10000,100)),
psd = c(500,0.1,0.1), # Proposed SDev
nit = 3000, # Nb Iterations
thinning = 50, # Thinning Factor
do_plots = FALSE, # Bool whether to show plot
seed = 42 # Seed
) {
set.seed(seed)
pn = prior.norm(mean=c(0,0,0),cov=mat)
post = posterior(nit, init=init, prior=pn, lh="gev", data=x, psd=psd)
if(do_plots) {
MCMC<-mcmc(post[1:nit %% thinning == 0, ])
plot(MCMC)
}
list(posterior = post,
acceptance_rate = attr(mcmc(post),"ar"))
}
monthly_bayes_fit = lapply(monthly_max, bayes_fitter, do_plots = TRUE,
psd = c(500,0.3,0.3), nit=30000, thinning = 300)












acceptance_rates = lapply(monthly_bayes_fit, function(x) x$acceptance_rate[1,])
print(acceptance_rates)
$jan
mu sigma xi total
0.16 0.56 0.58 0.44
$feb
mu sigma xi total
0.24 0.54 0.44 0.40
$mar
mu sigma xi total
0.24 0.32 0.20 0.25
$apr
mu sigma xi total
0.23 0.19 0.15 0.19
$may
mu sigma xi total
0.24 0.17 0.11 0.17
$jun
mu sigma xi total
0.24 0.18 0.12 0.18
$jul
mu sigma xi total
0.23 0.12 0.09 0.14
$aug
mu sigma xi total
0.24 0.22 0.14 0.20
$sep
mu sigma xi total
0.23 0.15 0.11 0.16
$oct
mu sigma xi total
0.24 0.38 0.23 0.28
$nov
mu sigma xi total
0.24 0.46 0.33 0.34
$dec
mu sigma xi total
0.19 0.58 0.53 0.43
rr # fit the r largest method #rl = rlarg.fit(data) data(venice) venfit <- rlarg.fit(venice[,-1])
$conv
[1] 0
$nllh
[1] 1139.09
$mle
[1] 120.5479027 12.7840265 -0.1129418
$se
[1] 1.36234055 0.54944881 0.01986948
PART 2
rr #plot the maximum as a function of ENSO plot(nino34,m1,main = of Monthly Maxima of PROD vs ENSO,xlab = , ylab = Maxima of PROD)

rr n_years = length(m1)/12 month_indexes = rep(c(1,2,3,4,5,6,7,8,9,10,11,12),n_years) plot(month_indexes,m1,main = Maxima of PROD,xlab = , ylab = Maxima of PROD)

# plot the chi plot for dependance analysis
dat.m1_month = cbind(m1,month_indexes);
dat.m1_nino = cbind(m1,nino34);
chiplot(dat.m1_month);
chiplot(dat.m1_nino);
PART 3 We will now analyse temporal clustering of extremes. For this, we will use the exiplot function from the evd library.
# get sample quantiles (used as plotting thresholds)
prod.quantiles = quantile(prod, c(0.8, 0.85, 0.9, 0.95,0.99,0.999))
exiplot(prod,prod.quantiles,main = "Analyiss of Extremal clustering",xlab = "Threshold", ylab = "Extremal Index")
We observe that the extremal index is ~0.2, we can therefore conclude that we have strong dependance of extremes, with the limiting mean cluster size being roughly 5. The clustering has no effect for estimaters based on the (monthly) maximum, but the r largest estimator is influenced by it.
---
title: "Thunderstrom Analysis"
output: html_notebook
---
**Imports**
```{r}
library(evd); 
library(evdbayes);
library(coda);
library(ismev);
library(xts);

```


**Loading the Data**

```{r}
load("../data/CAPE_Minder_Rychener_Malsot.RData")
load("../data/NINO34.RData")
load("../data/SRH_Minder_Rychener_Malsot.RData")
```

**Generate PROD**
```{r}
prod = sqrt(cape)*srh
```




** Create Time Series Objects **
```{r}
dates <- seq.Date(as.Date("1979-1-1"), as.Date("2015-12-31"), by=1)
feb29ix <- format(as.Date(dates), "%m-%d") == "02-29"
dates <- dates[!feb29ix]

prod_ts = xts(prod, order.by = rep(dates, each=8))
```



**Beginning the Analysis**
```{r}
month_names = c("jan","feb","mar","apr","may","jun","jul","aug","sep","oct","nov","dec")
get_monthly = function(x) {
  output = list()
  len = nrow(x)
  
  # Get month of element
  month = time(x)
  month = gsub("....-", "", month)
  month = gsub("-..", "", month)
  monthlist = unique(month)
  for (i in 1:12) {
    output[[i]] = x[month == monthlist[i],]
  }
  names(output) = month_names
  return(output)
}

monthly_max = get_monthly(apply.monthly(prod_ts, max))
r = 2
r_monthly_max = get_monthly(apply.monthly(prod_ts, function(x) order(x, decreasing=T)[1:r]))
```


```{r}
# get the monthly maxima of prod
m1 = as.data.frame(apply.monthly(prod_ts, max))$V1;
# produce the gumbel qq plot
gumbel_qq = function(x, title="") qqplot(qgumbel(c(1:length(x))/(length(x)+1)),
                                         x,
                                         main = paste("Gumbell Q-Q Plot", title),
                                         xlab = "Theoretical Quantiles", 
                                         ylab = "Sample Quantiles") ;

gumbel_qq(m1)

#qqplot(qgumbel(c(1:length(m1))/(length(m1)+1)),m1,main = "Gumbell Q-Q Plot",xlab = "Theoretical Quantiles", ylab = "Sample Quantiles") ;
```
The qq plot doesn't fit very well, especially in the lower tail. This is likely due
to seasonal dependence.

**Fitting GEV to the entire data**
```{r}
# fit gevd with MLE and produce diagnostic plots
fitmax.MLE<-fgev(m1,method="Nelder-Mead")
par(mfrow=c(2,2))
fitmax.MLE
plot(fitmax.MLE)
```
Poor fit, probably because the distribution isn't stationary. This is especially 
visible in the Probability plot, in which the confidence band is surpassed, 
indicating a poor fit.


```{r}
# fit gevd with Bayesian Techniques
# use the previous outputs (rounded) as initial values (use a different shape)
init<-c(1.6e4,4e3,0.1)
# arbitrary priors
mat <- diag(c(10000,10000,100)) 
pn <- prior.norm(mean=c(0,0,0),cov=mat)
# proposal standard deviation (found by trial and error to get 20%<acceptance rate<40%)
psd<-c(500,0.03,0.02)
# draw 3k samples from markov chain
nit = 30000
thinning = 300
post<-posterior(nit, init=init,prior=pn,lh="gev",data=m1,psd=psd)
# diagnostic plots
MCMC<-mcmc(post[1:nit %% thinning == 0, ], thin=300) 
plot(MCMC) 
attr(mcmc(post),"ar")

```


```{r}
#MCMC_stab <- mcmc(post, thin=50, start=1000)
acf(mcmc(post[1:nit %% thinning == 0, ]))
```
We observe that there seem to be no substantial issues from the autocorrelation 
plots. 

```{r}
apply(mcmc(post[1:nit %% thinning == 0, ]),2,mean)
apply(mcmc(post[1:nit %% thinning == 0, ]),2,sd)

```

** Fit with MLE for months separately**
```{r}
#monthly_fits = lapply(monthly_max, 
#                      function(x) fgev(data.frame(x)[,1], method="Nelder-Mead"))
monthly_fits = list()
error_cases = c(9, 12)
for (i in 1:length(monthly_max)) {
  print(i)
  
  if (i %in% error_cases) {
    monthly_fits[[i]] = fgev(as.data.frame(monthly_max[[i]])$V1, 
                             method = "Nelder-Mead",
                             std.err = FALSE)
  }
  else {
    monthly_fits[[i]] = fgev(as.data.frame(monthly_max[[i]])$V1, 
                             method = "Nelder-Mead")
  }
}
names(monthly_fits) = names(monthly_max)

```

```{r}
gumbel_qq(data.frame(monthly_max[[9]])[,1], "September")
gumbel_qq(data.frame(monthly_max[[12]])[,1], "December")
```
```{r}
get_se = function(x, ix) {
  if (is.null(x$std.err)) 0
  else x$std.err[ix]
}
mle_loc = unlist(lapply(monthly_fits, function(x) x$estimate[1]))
mle_loc_se = unlist(lapply(monthly_fits, get_se, 1))
mle_scale = unlist(lapply(monthly_fits, function(x) x$estimate[2]))
mle_scale_se = unlist(lapply(monthly_fits, get_se, 2))
mle_shape = unlist(lapply(monthly_fits, function(x) x$estimate[3]))
mle_shape_se = unlist(lapply(monthly_fits, get_se, 3))
```

```{r}
plot_w_err = function(x, y, se, title = NULL) {
  max_ix = which.max(y)
  min_ix = which.min(y)
  plot(x, y,
       ylim = c(y[min_ix] - se[min_ix], y[max_ix] + se[max_ix]),
       main = title)
  arrows(x,y-se,x,y+se, code=3, length=0.02, angle = 90)
}
plot_w_err(1:12, mle_loc, mle_loc_se, "Location Parameter as Estimated with Likelihood")
plot_w_err(1:12, mle_scale, mle_scale_se, "Scale Parameter as Estimated with Likelihood")
plot_w_err(1:12, mle_shape, mle_shape_se, "Shape Parameter as Estimated with Likelihood")

```

** Fit with Bayesian Methods for months separately**
```{r}

bayes_fitter = function(x, 
                        init = c(1e3, 1e3, 0.1), # Initial values
                        mat = diag(c(10000,10000,100)),
                        psd = c(500,0.1,0.1), # Proposed SDev
                        nit = 3000, # Nb Iterations
                        thinning = 50, # Thinning Factor
                        do_plots = FALSE, # Bool whether to show plot
                        seed = 42 # Seed 
                        ) {
  set.seed(seed)                
  pn = prior.norm(mean=c(0,0,0),cov=mat)
  post = posterior(nit, init=init, prior=pn, lh="gev", data=x, psd=psd)
  
  if(do_plots) {
    MCMC<-mcmc(post[1:nit %% thinning == 0, ]) 
    plot(MCMC) 
  }
  list(posterior = post, 
       acceptance_rate = attr(mcmc(post),"ar"))
}

monthly_bayes_fit = lapply(monthly_max, bayes_fitter, do_plots = TRUE,
                           psd = c(500,0.3,0.3), nit=30000, thinning = 300)
acceptance_rates = lapply(monthly_bayes_fit, function(x) x$acceptance_rate[1,])
print(acceptance_rates)

```



```{r}
# fit the r largest method
#rl = rlarg.fit(data)
data(venice)
venfit <- rlarg.fit(venice[,-1])
```


**PART 2**
```{r}
#plot the maximum as a function of ENSO
plot(nino34,m1,main = "Comparison of Monthly Maxima of PROD vs ENSO",xlab = "ENSO", ylab = "Monthly Maxima of PROD")
n_years = length(m1)/12
month_indexes = rep(c(1,2,3,4,5,6,7,8,9,10,11,12),n_years)
plot(month_indexes,m1,main = "Monthly Maxima of PROD",xlab = "Month", ylab = "Monthly Maxima of PROD")
```
```{r}
# plot the chi plot for dependance analysis
dat.m1_month = cbind(m1,month_indexes);
dat.m1_nino = cbind(m1,nino34);
chiplot(dat.m1_month);
chiplot(dat.m1_nino);
```



**PART 3**
We will now analyse temporal clustering of extremes. For this, we will use the exiplot function from the evd library.

```{r}
# get sample quantiles (used as plotting thresholds)
prod.quantiles = quantile(prod, c(0.8, 0.85, 0.9, 0.95,0.99,0.999))
exiplot(prod,prod.quantiles,main = "Analyiss of Extremal clustering",xlab = "Threshold", ylab = "Extremal Index")
```

We observe that the extremal index is ~0.2, we can therefore conclude that we have strong dependance of extremes, with the limiting mean cluster size being roughly 5. The clustering has no effect for estimaters based on the (monthly) maximum, but the r largest estimator is influenced by it.
